library(tidyverse)
library(tidyverse)
library(maps)
library(usmap)
library(viridis)
library(patchwork)
facilities <- read.csv("data/all_health_facilities.csv")
insurance <- read.csv("data/insurance.csv")
inpatients <- read_csv("data/medicare_inpatients.csv")
medicare <- read_csv("data/medicare.csv")

Inpatient Data

From data.cms.gov: The Inpatient Utilization and Payment Public Use File (Inpatient PUF) provides information on inpatient discharges for Medicare fee-for-service beneficiaries. The Inpatient PUF includes information on utilization, payment (total payment and Medicare payment), and hospital-specific charges for the more than 3,000 U.S. hospitals that receive Medicare Inpatient Prospective Payment System (IPPS) payments. The PUF is organized by hospital and Medicare Severity Diagnosis Related Group (MS-DRG) and covers Fiscal Year (FY) 2017. MS-DRGs included in the PUF represent more than 7 million discharges or 75 percent of total Medicare IPPS discharges.

inpatients <- inpatients %>%
  mutate(state = as.factor(state),
         drg = as.factor(str_extract(drg_definition, "\\d{3}")),
         drg_definition = as.factor(str_remove(drg_definition, "\\d{3} - ")),
         average_outofpocket = average_total_payments - average_medicare_payments,
         city = as.factor(city))
medi <- left_join(inpatients, medicare, by = "name")
medi_filt <- medi %>%
  filter(!is.na(id.y))
cities <- medi_filt %>%
  group_by(city.x, ) %>%
  summarize(mean = mean(average_outofpocket)) %>%
  arrange(desc(mean))

cities_loc <- medi_filt %>%
  select(city.x, state.x, long, lat)

cities <- left_join(cities, cities_loc, by = "city.x") %>%
  mutate(mean = round(mean, 2))
inpatients %>%
  group_by(drg_definition) %>%
  summarize(mean_outofpocket_costs = mean(average_outofpocket)) %>%
  arrange(desc(mean_outofpocket_costs)) %>%
  head(10)
## # A tibble: 10 x 2
##    drg_definition                                           mean_outofpocket_co…
##    <fct>                                                                   <dbl>
##  1 Simultaneous Pancreas/Kidney Transplant                                35594.
##  2 Heart Transplant Or Implant Of Heart Assist System W Mcc               31741.
##  3 Liver Transplant W Mcc Or Intestinal Transplant                        28727.
##  4 Lung Transplant                                                        26213.
##  5 Allogeneic Bone Marrow Transplant                                      22189.
##  6 Liver Transplant W/O Mcc                                               19497.
##  7 Interstitial Lung Disease W/O Cc/Mcc                                   17716.
##  8 Dental & Oral Diseases W Mcc                                           16886.
##  9 Ecmo Or Trach W Mv >96 Hrs Or Pdx Exc Face, Mouth & Nec…               14077.
## 10 Intracranial Vascular Procedures W Pdx Hemorrhage W Mcc                13764.
medicare_ratings <- medicare %>%
  mutate(
    effectiveness_of_care_national_comparison = case_when(
      effectiveness_of_care_national_comparison == "Not Available" ~ as.character(NA),
      TRUE ~ effectiveness_of_care_national_comparison),
    timeliness_of_care_national_comparison = case_when(
      timeliness_of_care_national_comparison == "Not Available" ~ as.character(NA),
      TRUE ~ timeliness_of_care_national_comparison),
    timeliness_of_care_national_comparison = as.factor(timeliness_of_care_national_comparison),
    effectiveness_of_care_national_comparison = as.factor(effectiveness_of_care_national_comparison),
    effectiveness_of_care_national_comparison = fct_relevel(effectiveness_of_care_national_comparison,
                                                            c("Below the national average", 
                                                              "Same as the national average", 
                                                              "Above the national average")),
    timeliness_of_care_national_comparison = fct_relevel(timeliness_of_care_national_comparison,
                                                            c("Below the national average", 
                                                              "Same as the national average", 
                                                              "Above the national average"))
  )
us_map <- map_data("state")
cols <- c("Below the national average" = "red", "Above the national average" = "#0A97F0", "Same as the national average" = "black")
medicare_locations_plot <- medicare %>%
  filter(state != "HI", state != "AK", long > -140, lat > 22) %>%
  ggplot() +
  geom_polygon(data = us_map, aes(x=long, y=lat, group=group),
                   color="#9EA5A9", fill = "#CED5DA") +
  geom_point(aes(x = long, y = lat), alpha = .2) +
  coord_map() +
  theme_void() +
  theme(plot.caption = element_text(hjust = .5)) +
  labs(title = "Locations of Medicare Providers", subtitle = "Medicare providers are spread thin in some Midwestern states.")
medicare_ratings_map1 <- medicare_ratings %>%
    filter(state != "HI", state != "AK", long > -140, lat > 22,
         !is.na(effectiveness_of_care_national_comparison))

effectiveness_plot <- medicare_ratings_map1 %>%
  ggplot() +
  geom_polygon(data = us_map, aes(x=long, y=lat, group=group),
                   color="#9EA5A9", fill = "#CED5DA") +
  geom_point(data = subset(medicare_ratings_map1, 
                           effectiveness_of_care_national_comparison == "Same as the national average"),
             aes(x = long, y = lat, color = effectiveness_of_care_national_comparison), 
             alpha = .2) +
  geom_point(data = subset(medicare_ratings_map1, 
                           effectiveness_of_care_national_comparison == "Above the national average"),
             aes(x = long, y = lat, color = effectiveness_of_care_national_comparison), 
             alpha = .6) +
  geom_point(data = subset(medicare_ratings_map1, 
                           effectiveness_of_care_national_comparison == "Below the national average"),
             aes(x = long, y = lat, color = effectiveness_of_care_national_comparison), 
             alpha = .4) +
  scale_color_manual(values = cols) +
  coord_map() +
  theme_void() +
  theme(plot.caption = element_text(hjust = .5)) +
  labs(title = "National Comparison of Effectiveness of Care for Medicare Providers", 
       subtitle = "Apparent clusters of ineffective providers in New York and Chicago areas.",
       color = "Effectiveness of care")
medicare_ratings_map2 <- medicare_ratings %>%
    filter(state != "HI", state != "AK", long > -140, lat > 22,
         !is.na(timeliness_of_care_national_comparison))

timeliness_plot <- medicare_ratings_map2 %>%
  ggplot() +
  geom_polygon(data = us_map, aes(x=long, y=lat, group=group),
                   color="#9EA5A9", fill = "#CED5DA") +
  geom_point(data = subset(medicare_ratings_map2, 
                           timeliness_of_care_national_comparison == "Same as the national average"),
             aes(x = long, y = lat, color = timeliness_of_care_national_comparison), 
             alpha = .2) +
  geom_point(data = subset(medicare_ratings_map2, 
                           timeliness_of_care_national_comparison == "Above the national average"),
             aes(x = long, y = lat, color = timeliness_of_care_national_comparison), 
             alpha = .6) +
  geom_point(data = subset(medicare_ratings_map2, 
                           timeliness_of_care_national_comparison == "Below the national average"),
             aes(x = long, y = lat, color = timeliness_of_care_national_comparison), 
             alpha = .4) +
  scale_color_manual(values = cols) +
  coord_map() +
  theme_void() +
  theme(plot.caption = element_text(hjust = .5)) +
  labs(title = "National Comparison of Timeliness of Care for Medicare Providers", 
       subtitle = "Timeliness suffers in East Coast and California cities.",
       color = "Timeliness of care")
medicare_locations_plot / effectiveness_plot / timeliness_plot +
  plot_layout(widths = 5, heights = 20)

cities %>%
  arrange(mean) %>%
  filter(state.x != "HI", state.x != "AK", long > -140) %>%
  ggplot() +
  geom_polygon(data = us_map, aes(x=long, y=lat, group=group),
                   color="#9EA5A9", fill = "#CED5DA") +
  geom_point(aes(x = long, y = lat, color = mean, size = mean), alpha = .2) +
  coord_map() +
  scale_size_continuous(range=c(.1,5), guide = FALSE) +
  scale_color_viridis(trans="log") +
  theme_void() +
  theme(plot.caption = element_text(hjust = .5)) +
  labs(title = "Costs Not Covered By Medicare for Inpatient Procedures", color = "Mean out-of-pocket costs ($)", subtitle = "in 2017", caption = "Average personal costs for procedures signicantly higher in some California and East Coast cities.")

Insurance

insurance$uninsured_rate_2010 <- as.numeric(str_replace_all(insurance$uninsured_rate_2010, "%", ""))

insurance$uninsured_rate_2015 <- as.numeric(str_replace_all(insurance$uninsured_rate_2015, "%", ""))


ggplot(data = insurance, mapping = aes(x = uninsured_rate_2010, y = uninsured_rate_2015)) +
  geom_point() +
  labs(x = "Insurance Rate in 2010", y = "Insurance Rate in 2015", 
       title = "Changes in Health Insurance Rates before and after the Affordable Care Act")

medicaid_clean <- insurance %>%
  filter(state_medicaid_expansion_2016 == TRUE | state_medicaid_expansion_2016 == FALSE)

ggplot(data = medicaid_clean, mapping = aes(x = state_medicaid_expansion_2016, y = medicaid_enrollment_change_2013_2016)) +
  geom_boxplot() +
  ylim(-4000, 4500000) +
  labs(x = "Did the state expand funding for Medicaid in 2016?", y = "Medicaid Enrollment Change between 2013 and 2016")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

Healthcare Facilities

# filter for hospitals in the contiguous US
hospitals <- facilities %>% 
  filter(overall_type == "hospital") %>% 
  #filter(long > -130 & long < -60) %>% 
  #filter(lat > 20 & lat < 50) %>% 
  mutate(owner = as.character(owner)) %>% 
  mutate(general_owner = case_when(owner %in% c("Government - District/Authority", "Government - Federal",
                                             "Government - Local", "Government - State") ~ "Government",
                                   owner == "Proprietary" ~ "For-Profit",
                                   TRUE ~ owner))
  
# filter for pharmacies 
pharmacies <- facilities %>% 
  filter(overall_type == "pharmacy")
### ADJUST THESE BY POPULATION??? ###
# map of hospitals
hospital_cnt <- hospitals %>% 
  group_by(state) %>% 
  summarize(total_hospitals = n(),
            total_beds = sum(beds, na.rm = TRUE),
            avg_beds = total_beds / total_hospitals)

plot_usmap(data = hospital_cnt, values = "total_hospitals") + 
  scale_fill_continuous(low = "white", high = "dark red", name = "# Hospitals", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

plot_usmap(data = hospital_cnt, values = "total_beds") + 
  scale_fill_continuous(low = "white", high = "dark red", name = "# Beds", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

plot_usmap(data = hospital_cnt, values = "avg_beds") + 
  scale_fill_continuous(low = "white", high = "dark red", name = "# Avg. Beds \nPer Hospital", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

# map of pharmacies
pharmacy_cnt <- pharmacies %>% 
  group_by(state) %>% 
  summarize(total_pharmacies = n())

plot_usmap(data = pharmacy_cnt, values = "total_pharmacies") + 
  scale_fill_continuous(low = "white", high = "dark red", name = "# Pharmacies", label = scales::comma) + theme(legend.position = "right")
## Warning: Use of `map_df$x` is discouraged. Use `x` instead.
## Warning: Use of `map_df$y` is discouraged. Use `y` instead.
## Warning: Use of `map_df$group` is discouraged. Use `group` instead.

# proportion of owner type by state bar graph
hospitals %>% 
  filter(general_owner != "Not Available") %>% 
  filter(state %in% c("NY", "CA", "TX", "NC")) %>% 
  ggplot(aes(x = state, fill = general_owner)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "Set2") +
  theme_light() +
  labs(x = "State", y = "Proportion", fill = "Owner type",
       title = "Proportion of Hospital Owner Types Differ Largely by State")

# proportion of owner type by state table
hospitals %>% 
  group_by(state, general_owner) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n)) %>% 
  filter(general_owner == "For-Profit") %>% 
  arrange((freq))
## # A tibble: 51 x 4
## # Groups:   state [51]
##    state general_owner     n    freq
##    <fct> <chr>         <int>   <dbl>
##  1 MN    For-Profit        1 0.00694
##  2 NY    For-Profit        4 0.0144 
##  3 ME    For-Profit        1 0.0192 
##  4 MD    For-Profit        2 0.0278 
##  5 IA    For-Profit        5 0.0345 
##  6 MT    For-Profit        3 0.0435 
##  7 CT    For-Profit        2 0.0488 
##  8 RI    For-Profit        1 0.05   
##  9 OR    For-Profit        4 0.0541 
## 10 WI    For-Profit       10 0.0592 
## # … with 41 more rows
# population adjusted density of hospitals / pharmacies 

# density of hospitals vs mean SES by county

# shinyapp hospital / pharmacy finder
# - name, address, website, number
# - map